{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_features = pd.read_csv(\"train_features.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>id</th>\n",
       "      <th>budget</th>\n",
       "      <th>popularity</th>\n",
       "      <th>runtime</th>\n",
       "      <th>revenue</th>\n",
       "      <th>has_collection</th>\n",
       "      <th>has_genres</th>\n",
       "      <th>has_companies</th>\n",
       "      <th>has_countries</th>\n",
       "      <th>...</th>\n",
       "      <th>pt</th>\n",
       "      <th>ro</th>\n",
       "      <th>ru</th>\n",
       "      <th>sr</th>\n",
       "      <th>sv</th>\n",
       "      <th>ta</th>\n",
       "      <th>te</th>\n",
       "      <th>tr</th>\n",
       "      <th>ur</th>\n",
       "      <th>zh</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>14000000</td>\n",
       "      <td>6.575393</td>\n",
       "      <td>93.0</td>\n",
       "      <td>12314651</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>40000000</td>\n",
       "      <td>8.248895</td>\n",
       "      <td>113.0</td>\n",
       "      <td>95149435</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3300000</td>\n",
       "      <td>64.299990</td>\n",
       "      <td>105.0</td>\n",
       "      <td>13092000</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1200000</td>\n",
       "      <td>3.174936</td>\n",
       "      <td>122.0</td>\n",
       "      <td>16000000</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1.148070</td>\n",
       "      <td>118.0</td>\n",
       "      <td>3923970</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 298 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  id    budget  popularity  runtime   revenue  has_collection  \\\n",
       "0           0   1  14000000    6.575393     93.0  12314651               1   \n",
       "1           1   2  40000000    8.248895    113.0  95149435               1   \n",
       "2           2   3   3300000   64.299990    105.0  13092000               0   \n",
       "3           3   4   1200000    3.174936    122.0  16000000               0   \n",
       "4           4   5         0    1.148070    118.0   3923970               0   \n",
       "\n",
       "   has_genres  has_companies  has_countries  ...  pt  ro  ru  sr  sv  ta  te  \\\n",
       "0           1              1              1  ...   0   0   0   0   0   0   0   \n",
       "1           1              1              1  ...   0   0   0   0   0   0   0   \n",
       "2           1              1              1  ...   0   0   0   0   0   0   0   \n",
       "3           1              0              1  ...   0   0   0   0   0   0   0   \n",
       "4           1              0              1  ...   0   0   0   0   0   0   0   \n",
       "\n",
       "   tr  ur  zh  \n",
       "0   0   0   0  \n",
       "1   0   0   0  \n",
       "2   0   0   0  \n",
       "3   0   0   0  \n",
       "4   0   0   0  \n",
       "\n",
       "[5 rows x 298 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_features.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 3000 entries, 0 to 2999\n",
      "Columns: 298 entries, Unnamed: 0 to zh\n",
      "dtypes: float64(2), int64(296)\n",
      "memory usage: 6.8 MB\n"
     ]
    }
   ],
   "source": [
    "train_features.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_x=train_features.drop(['revenue','id'],axis=1)\n",
    "train_y=train_features['revenue']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.ensemble import AdaBoostRegressor\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "rf = RandomForestRegressor()\n",
    "parameters = {\"randomforestregressor__n_estimators\": range(10,30)}\n",
    "pipeline = Pipeline([ ('scaler', MinMaxScaler()), ('randomforestregressor', rf)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = GridSearchCV(estimator=pipeline, param_grid=parameters)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv='warn', error_score='raise-deprecating',\n",
       "             estimator=Pipeline(memory=None,\n",
       "                                steps=[('scaler',\n",
       "                                        MinMaxScaler(copy=True,\n",
       "                                                     feature_range=(0, 1))),\n",
       "                                       ('randomforestregressor',\n",
       "                                        RandomForestRegressor(bootstrap=True,\n",
       "                                                              criterion='mse',\n",
       "                                                              max_depth=None,\n",
       "                                                              max_features='auto',\n",
       "                                                              max_leaf_nodes=None,\n",
       "                                                              min_impurity_decrease=0.0,\n",
       "                                                              min_impurity_split=None,\n",
       "                                                              min_samples_leaf=1,\n",
       "                                                              min_samples_split=2,\n",
       "                                                              min_weight_fraction_leaf=0.0,\n",
       "                                                              n_estimators='warn',\n",
       "                                                              n_jobs=None,\n",
       "                                                              oob_score=False,\n",
       "                                                              random_state=None,\n",
       "                                                              verbose=0,\n",
       "                                                              warm_start=False))],\n",
       "                                verbose=False),\n",
       "             iid='warn', n_jobs=None,\n",
       "             param_grid={'randomforestregressor__n_estimators': range(10, 30)},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
       "             scoring=None, verbose=0)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.fit(train_x,train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "最优分数： 0.6919\n",
      "最优参数： {'randomforestregressor__n_estimators': 11}\n"
     ]
    }
   ],
   "source": [
    "print(\"最优分数： %.4lf\" %clf.best_score_)\n",
    "print(\"最优参数：\", clf.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "reg = AdaBoostRegressor()\n",
    "parameters = {\"adaboostregressor__n_estimators\": range(10,100)}\n",
    "pipeline = Pipeline([ ('scaler', MinMaxScaler()), ('adaboostregressor', reg)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "clf_ada = GridSearchCV(estimator=pipeline, param_grid=parameters)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv='warn', error_score='raise-deprecating',\n",
       "             estimator=Pipeline(memory=None,\n",
       "                                steps=[('scaler',\n",
       "                                        MinMaxScaler(copy=True,\n",
       "                                                     feature_range=(0, 1))),\n",
       "                                       ('adaboostregressor',\n",
       "                                        AdaBoostRegressor(base_estimator=None,\n",
       "                                                          learning_rate=1.0,\n",
       "                                                          loss='linear',\n",
       "                                                          n_estimators=50,\n",
       "                                                          random_state=None))],\n",
       "                                verbose=False),\n",
       "             iid='warn', n_jobs=None,\n",
       "             param_grid={'adaboostregressor__n_estimators': range(10, 100)},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
       "             scoring=None, verbose=0)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf_ada.fit(train_x,train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "最优分数： 0.6154\n",
      "最优参数： {'adaboostregressor__n_estimators': 12}\n"
     ]
    }
   ],
   "source": [
    "print(\"最优分数： %.4lf\" %clf_ada.best_score_)\n",
    "print(\"最优参数：\", clf_ada.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
